Traffic Analysis

Setup

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib notebook

import datetime
import sys
from typing import Iterable

sys.path.append("../../..")
In [2]:
import matplotlib
from matplotlib import pyplot
import pandas as pd

#matplotlib.rcParams['figure.figsize'] = (9.5, 10.0)
In [3]:
import netanalysis.traffic.data.model as traffic
from netanalysis.traffic.data.file_repository import FileTrafficRepository
import netanalysis.traffic.analysis.find_anomalies as fa
from netanalysis.infrastructure.resources import resource_filename

PRODUCT_LIST = [
    traffic.ProductId.WEB_SEARCH, traffic.ProductId.MAPS, traffic.ProductId.IMAGES,
    traffic.ProductId.YOUTUBE, traffic.ProductId.BLOGGER, traffic.ProductId.SITES,
    traffic.ProductId.GMAIL, traffic.ProductId.GROUPS,
    traffic.ProductId.TRANSLATE,
    traffic.ProductId.SPREADSHEETS, traffic.ProductId.DOCS,
    traffic.ProductId.EARTH
]
repo = FileTrafficRepository(resource_filename("traffic_data"))
In [4]:
def plot_expectations(time_series, expectations):
    time_series.plot()
    expectations.expected.plot(linewidth=1)
    pyplot.fill_between(time_series.index, expectations.lower_bound, expectations.upper_bound, alpha=0.3, linewidth=0)
    anomalous_dates = (time_series < expectations.lower_bound).loc[lambda e: e].index
    if not anomalous_dates.empty:
        time_series[anomalous_dates].plot(style='ro')
In [5]:
def show_region_traffic(region_code, product_ids) -> None:
    fig = pyplot.figure()
    for index, product_id in enumerate(product_ids):
        hires_traffic = repo.get_traffic(region_code, product_id)
        if hires_traffic.empty:
            continue    
        time_series = hires_traffic.resample("D").mean()
        expectations = fa.get_expectations_1(time_series)
        axes = fig.add_subplot(len(product_ids), 1, index + 1)
        axes.set_ylabel(product_id.name)
        axes.set_ylim(bottom=0, top=time_series.max()*1.1)
        plot_expectations(time_series["2016":], expectations["2016":])
        fig.show()

def show_product_traffic(product_id: traffic.ProductId, regions: Iterable[str]) -> None:
    fig = pyplot.figure()
    for index, region_code in enumerate(regions):
        hires_traffic = repo.get_traffic(region_code, product_id)
        if hires_traffic.empty:
            continue    
        time_series = hires_traffic.resample("D").mean()
        expectations = fa.get_expectations_1(time_series)
        axes = fig.add_subplot(len(regions), 1, index + 1)
        axes.set_ylabel(region_code)
        axes.set_ylim(bottom=0, top=time_series.max()*1.1)
        plot_expectations(time_series["2016":], expectations["2016":])
        fig.show()

Analysis

In [6]:
INTERESTING_REGIONS = [
    "DZ", "BY", "CM", "CD", "EG", "ET", "GA", "GM", "IN", "IR", "IQ", "PK", "SA", "SY", "TG", "TR", "UA", "VN",
    "PR", "VI", "TC", "US"
]
In [7]:
# The US is a control
show_region_traffic("US", PRODUCT_LIST)
In [8]:
all_disruptions = fa.find_all_disruptions(repo, INTERESTING_REGIONS, PRODUCT_LIST)
all_disruptions.sort(reverse=True, key=lambda d: (d.start, d.end))
INFO:root:Processing region DZ product WEB_SEARCH
INFO:root:Found 0 major product disruptions from 4 disruptions and 6 anomalies
INFO:root:Processing region DZ product MAPS
INFO:root:Empty time series for region DZ product MAPS
INFO:root:Processing region DZ product IMAGES
INFO:root:Empty time series for region DZ product IMAGES
INFO:root:Processing region DZ product YOUTUBE
INFO:root:Found 0 major product disruptions from 6 disruptions and 11 anomalies
INFO:root:Processing region DZ product BLOGGER
INFO:root:Found no anomalies
INFO:root:Processing region DZ product SITES
INFO:root:Found 0 major product disruptions from 1 disruptions and 1 anomalies
INFO:root:Processing region DZ product GMAIL
INFO:root:Empty time series for region DZ product GMAIL
INFO:root:Processing region DZ product GROUPS
INFO:root:Found 1 major product disruptions from 1 disruptions and 1 anomalies
INFO:root:Processing region DZ product TRANSLATE
INFO:root:Found 1 major product disruptions from 6 disruptions and 10 anomalies
INFO:root:Processing region DZ product SPREADSHEETS
INFO:root:Empty time series for region DZ product SPREADSHEETS
INFO:root:Processing region DZ product DOCS
INFO:root:Empty time series for region DZ product DOCS
INFO:root:Processing region DZ product EARTH
INFO:root:Empty time series for region DZ product EARTH
INFO:root:Found 2 region disruptions from 2 product disruptions for DZ
INFO:root:Processing region BY product WEB_SEARCH
INFO:root:Found 0 major product disruptions from 6 disruptions and 9 anomalies
INFO:root:Processing region BY product MAPS
INFO:root:Empty time series for region BY product MAPS
INFO:root:Processing region BY product IMAGES
INFO:root:Empty time series for region BY product IMAGES
INFO:root:Processing region BY product YOUTUBE
INFO:root:Found 0 major product disruptions from 4 disruptions and 16 anomalies
INFO:root:Processing region BY product BLOGGER
INFO:root:Found 1 major product disruptions from 2 disruptions and 3 anomalies
INFO:root:Processing region BY product SITES
INFO:root:Found no anomalies
INFO:root:Processing region BY product GMAIL
INFO:root:Empty time series for region BY product GMAIL
INFO:root:Processing region BY product GROUPS
INFO:root:Found no anomalies
INFO:root:Processing region BY product TRANSLATE
INFO:root:Found 2 major product disruptions from 6 disruptions and 21 anomalies
INFO:root:Processing region BY product SPREADSHEETS
INFO:root:Empty time series for region BY product SPREADSHEETS
INFO:root:Processing region BY product DOCS
INFO:root:Empty time series for region BY product DOCS
INFO:root:Processing region BY product EARTH
INFO:root:Empty time series for region BY product EARTH
INFO:root:Found 3 region disruptions from 3 product disruptions for BY
INFO:root:Processing region CM product WEB_SEARCH
INFO:root:Found 0 major product disruptions from 6 disruptions and 6 anomalies
INFO:root:Processing region CM product MAPS
INFO:root:Empty time series for region CM product MAPS
INFO:root:Processing region CM product IMAGES
INFO:root:Empty time series for region CM product IMAGES
INFO:root:Processing region CM product YOUTUBE
INFO:root:Found 0 major product disruptions from 2 disruptions and 2 anomalies
INFO:root:Processing region CM product BLOGGER
INFO:root:Found no anomalies
INFO:root:Processing region CM product SITES
INFO:root:Found no anomalies
INFO:root:Processing region CM product GMAIL
INFO:root:Empty time series for region CM product GMAIL
INFO:root:Processing region CM product GROUPS
INFO:root:Found no anomalies
INFO:root:Processing region CM product TRANSLATE
INFO:root:Found 1 major product disruptions from 7 disruptions and 12 anomalies
INFO:root:Processing region CM product SPREADSHEETS
INFO:root:Empty time series for region CM product SPREADSHEETS
INFO:root:Processing region CM product DOCS
INFO:root:Empty time series for region CM product DOCS
INFO:root:Processing region CM product EARTH
INFO:root:Empty time series for region CM product EARTH
INFO:root:Found 1 region disruptions from 1 product disruptions for CM
INFO:root:Processing region CD product WEB_SEARCH
INFO:root:Found 5 major product disruptions from 5 disruptions and 47 anomalies
INFO:root:Processing region CD product MAPS
INFO:root:Empty time series for region CD product MAPS
INFO:root:Processing region CD product IMAGES
INFO:root:Empty time series for region CD product IMAGES
INFO:root:Processing region CD product YOUTUBE
INFO:root:Found 6 major product disruptions from 6 disruptions and 44 anomalies
INFO:root:Processing region CD product BLOGGER
INFO:root:Found 3 major product disruptions from 4 disruptions and 23 anomalies
INFO:root:Processing region CD product SITES
INFO:root:Found 1 major product disruptions from 1 disruptions and 26 anomalies
INFO:root:Processing region CD product GMAIL
INFO:root:Empty time series for region CD product GMAIL
INFO:root:Processing region CD product GROUPS
INFO:root:Empty time series for region CD product GROUPS
INFO:root:Processing region CD product TRANSLATE
INFO:root:Found 5 major product disruptions from 5 disruptions and 32 anomalies
INFO:root:Processing region CD product SPREADSHEETS
INFO:root:Empty time series for region CD product SPREADSHEETS
INFO:root:Processing region CD product DOCS
INFO:root:Empty time series for region CD product DOCS
INFO:root:Processing region CD product EARTH
INFO:root:Empty time series for region CD product EARTH
INFO:root:Found 8 region disruptions from 20 product disruptions for CD
INFO:root:Processing region EG product WEB_SEARCH
INFO:root:Found 0 major product disruptions from 2 disruptions and 4 anomalies
INFO:root:Processing region EG product MAPS
INFO:root:Empty time series for region EG product MAPS
INFO:root:Processing region EG product IMAGES
INFO:root:Empty time series for region EG product IMAGES
INFO:root:Processing region EG product YOUTUBE
INFO:root:Found 0 major product disruptions from 4 disruptions and 5 anomalies
INFO:root:Processing region EG product BLOGGER
INFO:root:Found 0 major product disruptions from 1 disruptions and 1 anomalies
INFO:root:Processing region EG product SITES
INFO:root:Found 1 major product disruptions from 3 disruptions and 7 anomalies
INFO:root:Processing region EG product GMAIL
INFO:root:Empty time series for region EG product GMAIL
INFO:root:Processing region EG product GROUPS
INFO:root:Found 0 major product disruptions from 1 disruptions and 1 anomalies
INFO:root:Processing region EG product TRANSLATE
INFO:root:Found 0 major product disruptions from 2 disruptions and 6 anomalies
INFO:root:Processing region EG product SPREADSHEETS
INFO:root:Empty time series for region EG product SPREADSHEETS
INFO:root:Processing region EG product DOCS
INFO:root:Empty time series for region EG product DOCS
INFO:root:Processing region EG product EARTH
INFO:root:Empty time series for region EG product EARTH
INFO:root:Found 1 region disruptions from 1 product disruptions for EG
INFO:root:Processing region ET product WEB_SEARCH
INFO:root:Found 4 major product disruptions from 7 disruptions and 26 anomalies
INFO:root:Processing region ET product MAPS
INFO:root:Empty time series for region ET product MAPS
INFO:root:Processing region ET product IMAGES
INFO:root:Empty time series for region ET product IMAGES
INFO:root:Processing region ET product YOUTUBE
INFO:root:Found 3 major product disruptions from 9 disruptions and 34 anomalies
INFO:root:Processing region ET product BLOGGER
INFO:root:Found 3 major product disruptions from 3 disruptions and 22 anomalies
INFO:root:Processing region ET product SITES
INFO:root:Found no anomalies
INFO:root:Processing region ET product GMAIL
INFO:root:Empty time series for region ET product GMAIL
INFO:root:Processing region ET product GROUPS
INFO:root:Found 1 major product disruptions from 1 disruptions and 1 anomalies
INFO:root:Processing region ET product TRANSLATE
INFO:root:Found 3 major product disruptions from 4 disruptions and 8 anomalies
INFO:root:Processing region ET product SPREADSHEETS
INFO:root:Empty time series for region ET product SPREADSHEETS
INFO:root:Processing region ET product DOCS
INFO:root:Empty time series for region ET product DOCS
INFO:root:Processing region ET product EARTH
INFO:root:Empty time series for region ET product EARTH
INFO:root:Found 5 region disruptions from 14 product disruptions for ET
INFO:root:Processing region GA product WEB_SEARCH
INFO:root:Found 4 major product disruptions from 9 disruptions and 27 anomalies
INFO:root:Processing region GA product MAPS
INFO:root:Empty time series for region GA product MAPS
INFO:root:Processing region GA product IMAGES
INFO:root:Empty time series for region GA product IMAGES
INFO:root:Processing region GA product YOUTUBE
INFO:root:Found 5 major product disruptions from 6 disruptions and 34 anomalies
INFO:root:Processing region GA product BLOGGER
INFO:root:Found 2 major product disruptions from 4 disruptions and 9 anomalies
INFO:root:Processing region GA product SITES
INFO:root:Found no anomalies
INFO:root:Processing region GA product GMAIL
INFO:root:Empty time series for region GA product GMAIL
INFO:root:Processing region GA product GROUPS
INFO:root:Empty time series for region GA product GROUPS
INFO:root:Processing region GA product TRANSLATE
INFO:root:Found 4 major product disruptions from 5 disruptions and 11 anomalies
INFO:root:Processing region GA product SPREADSHEETS
INFO:root:Empty time series for region GA product SPREADSHEETS
INFO:root:Processing region GA product DOCS
INFO:root:Empty time series for region GA product DOCS
INFO:root:Processing region GA product EARTH
INFO:root:Empty time series for region GA product EARTH
INFO:root:Found 5 region disruptions from 15 product disruptions for GA
INFO:root:Processing region GM product WEB_SEARCH
INFO:root:Found 2 major product disruptions from 8 disruptions and 18 anomalies
INFO:root:Processing region GM product MAPS
INFO:root:Empty time series for region GM product MAPS
INFO:root:Processing region GM product IMAGES
INFO:root:Empty time series for region GM product IMAGES
INFO:root:Processing region GM product YOUTUBE
INFO:root:Found 2 major product disruptions from 10 disruptions and 16 anomalies
INFO:root:Processing region GM product BLOGGER
INFO:root:Found 1 major product disruptions from 1 disruptions and 1 anomalies
INFO:root:Processing region GM product SITES
INFO:root:Found no anomalies
INFO:root:Processing region GM product GMAIL
INFO:root:Empty time series for region GM product GMAIL
INFO:root:Processing region GM product GROUPS
INFO:root:Empty time series for region GM product GROUPS
INFO:root:Processing region GM product TRANSLATE
INFO:root:Found 3 major product disruptions from 6 disruptions and 11 anomalies
INFO:root:Processing region GM product SPREADSHEETS
INFO:root:Empty time series for region GM product SPREADSHEETS
INFO:root:Processing region GM product DOCS
INFO:root:Empty time series for region GM product DOCS
INFO:root:Processing region GM product EARTH
INFO:root:Empty time series for region GM product EARTH
INFO:root:Found 5 region disruptions from 8 product disruptions for GM
INFO:root:Processing region IN product WEB_SEARCH
INFO:root:Found 0 major product disruptions from 8 disruptions and 11 anomalies
INFO:root:Processing region IN product MAPS
INFO:root:Empty time series for region IN product MAPS
INFO:root:Processing region IN product IMAGES
INFO:root:Empty time series for region IN product IMAGES
INFO:root:Processing region IN product YOUTUBE
INFO:root:Found no anomalies
INFO:root:Processing region IN product BLOGGER
INFO:root:Found 0 major product disruptions from 1 disruptions and 4 anomalies
INFO:root:Processing region IN product SITES
INFO:root:Found 1 major product disruptions from 3 disruptions and 15 anomalies
INFO:root:Processing region IN product GMAIL
INFO:root:Empty time series for region IN product GMAIL
INFO:root:Processing region IN product GROUPS
INFO:root:Found 1 major product disruptions from 4 disruptions and 5 anomalies
INFO:root:Processing region IN product TRANSLATE
INFO:root:Found 1 major product disruptions from 1 disruptions and 5 anomalies
INFO:root:Processing region IN product SPREADSHEETS
INFO:root:Empty time series for region IN product SPREADSHEETS
INFO:root:Processing region IN product DOCS
INFO:root:Empty time series for region IN product DOCS
INFO:root:Processing region IN product EARTH
INFO:root:Empty time series for region IN product EARTH
INFO:root:Found 3 region disruptions from 3 product disruptions for IN
INFO:root:Processing region IR product WEB_SEARCH
INFO:root:Found 0 major product disruptions from 8 disruptions and 13 anomalies
INFO:root:Processing region IR product MAPS
INFO:root:Empty time series for region IR product MAPS
INFO:root:Processing region IR product IMAGES
INFO:root:Empty time series for region IR product IMAGES
INFO:root:Processing region IR product YOUTUBE
INFO:root:Found 3 major product disruptions from 3 disruptions and 11 anomalies
INFO:root:Processing region IR product BLOGGER
INFO:root:Found 1 major product disruptions from 1 disruptions and 20 anomalies
INFO:root:Processing region IR product SITES
INFO:root:Found 1 major product disruptions from 1 disruptions and 5 anomalies
INFO:root:Processing region IR product GMAIL
INFO:root:Empty time series for region IR product GMAIL
INFO:root:Processing region IR product GROUPS
INFO:root:Found 2 major product disruptions from 2 disruptions and 4 anomalies
INFO:root:Processing region IR product TRANSLATE
INFO:root:Found 4 major product disruptions from 7 disruptions and 15 anomalies
INFO:root:Processing region IR product SPREADSHEETS
INFO:root:Empty time series for region IR product SPREADSHEETS
INFO:root:Processing region IR product DOCS
INFO:root:Empty time series for region IR product DOCS
INFO:root:Processing region IR product EARTH
INFO:root:Empty time series for region IR product EARTH
INFO:root:Found 10 region disruptions from 11 product disruptions for IR
INFO:root:Processing region IQ product WEB_SEARCH
INFO:root:Found 1 major product disruptions from 2 disruptions and 11 anomalies
INFO:root:Processing region IQ product MAPS
INFO:root:Empty time series for region IQ product MAPS
INFO:root:Processing region IQ product IMAGES
INFO:root:Empty time series for region IQ product IMAGES
INFO:root:Processing region IQ product YOUTUBE
INFO:root:Found 1 major product disruptions from 3 disruptions and 36 anomalies
INFO:root:Processing region IQ product BLOGGER
INFO:root:Found 1 major product disruptions from 1 disruptions and 12 anomalies
INFO:root:Processing region IQ product SITES
INFO:root:Found 2 major product disruptions from 3 disruptions and 7 anomalies
INFO:root:Processing region IQ product GMAIL
INFO:root:Empty time series for region IQ product GMAIL
INFO:root:Processing region IQ product GROUPS
INFO:root:Found 2 major product disruptions from 2 disruptions and 3 anomalies
INFO:root:Processing region IQ product TRANSLATE
INFO:root:Found 3 major product disruptions from 5 disruptions and 19 anomalies
INFO:root:Processing region IQ product SPREADSHEETS
INFO:root:Empty time series for region IQ product SPREADSHEETS
INFO:root:Processing region IQ product DOCS
INFO:root:Empty time series for region IQ product DOCS
INFO:root:Processing region IQ product EARTH
INFO:root:Empty time series for region IQ product EARTH
INFO:root:Found 7 region disruptions from 10 product disruptions for IQ
INFO:root:Processing region PK product WEB_SEARCH
INFO:root:Found 0 major product disruptions from 7 disruptions and 8 anomalies
INFO:root:Processing region PK product MAPS
INFO:root:Empty time series for region PK product MAPS
INFO:root:Processing region PK product IMAGES
INFO:root:Empty time series for region PK product IMAGES
INFO:root:Processing region PK product YOUTUBE
INFO:root:Found 1 major product disruptions from 4 disruptions and 5 anomalies
INFO:root:Processing region PK product BLOGGER
INFO:root:Found 0 major product disruptions from 4 disruptions and 4 anomalies
INFO:root:Processing region PK product SITES
INFO:root:Found 0 major product disruptions from 2 disruptions and 2 anomalies
INFO:root:Processing region PK product GMAIL
INFO:root:Empty time series for region PK product GMAIL
INFO:root:Processing region PK product GROUPS
INFO:root:Found 0 major product disruptions from 1 disruptions and 1 anomalies
INFO:root:Processing region PK product TRANSLATE
INFO:root:Found 1 major product disruptions from 4 disruptions and 15 anomalies
INFO:root:Processing region PK product SPREADSHEETS
INFO:root:Empty time series for region PK product SPREADSHEETS
INFO:root:Processing region PK product DOCS
INFO:root:Empty time series for region PK product DOCS
INFO:root:Processing region PK product EARTH
INFO:root:Empty time series for region PK product EARTH
INFO:root:Found 2 region disruptions from 2 product disruptions for PK
INFO:root:Processing region SA product WEB_SEARCH
INFO:root:Found 0 major product disruptions from 9 disruptions and 13 anomalies
INFO:root:Processing region SA product MAPS
INFO:root:Empty time series for region SA product MAPS
INFO:root:Processing region SA product IMAGES
INFO:root:Empty time series for region SA product IMAGES
INFO:root:Processing region SA product YOUTUBE
INFO:root:Found 0 major product disruptions from 5 disruptions and 25 anomalies
INFO:root:Processing region SA product BLOGGER
INFO:root:Found 1 major product disruptions from 1 disruptions and 2 anomalies
INFO:root:Processing region SA product SITES
INFO:root:Found 0 major product disruptions from 1 disruptions and 1 anomalies
INFO:root:Processing region SA product GMAIL
INFO:root:Empty time series for region SA product GMAIL
INFO:root:Processing region SA product GROUPS
INFO:root:Found no anomalies
INFO:root:Processing region SA product TRANSLATE
INFO:root:Found 1 major product disruptions from 4 disruptions and 6 anomalies
INFO:root:Processing region SA product SPREADSHEETS
INFO:root:Empty time series for region SA product SPREADSHEETS
INFO:root:Processing region SA product DOCS
INFO:root:Empty time series for region SA product DOCS
INFO:root:Processing region SA product EARTH
INFO:root:Empty time series for region SA product EARTH
INFO:root:Found 2 region disruptions from 2 product disruptions for SA
INFO:root:Processing region SY product WEB_SEARCH
INFO:root:Found 0 major product disruptions from 4 disruptions and 4 anomalies
INFO:root:Processing region SY product MAPS
INFO:root:Empty time series for region SY product MAPS
INFO:root:Processing region SY product IMAGES
INFO:root:Empty time series for region SY product IMAGES
INFO:root:Processing region SY product YOUTUBE
INFO:root:Found 0 major product disruptions from 7 disruptions and 13 anomalies
INFO:root:Processing region SY product BLOGGER
INFO:root:Found no anomalies
INFO:root:Processing region SY product SITES
INFO:root:Found 3 major product disruptions from 3 disruptions and 30 anomalies
INFO:root:Processing region SY product GMAIL
INFO:root:Empty time series for region SY product GMAIL
INFO:root:Processing region SY product GROUPS
INFO:root:Found no anomalies
INFO:root:Processing region SY product TRANSLATE
INFO:root:Found 1 major product disruptions from 2 disruptions and 7 anomalies
INFO:root:Processing region SY product SPREADSHEETS
INFO:root:Empty time series for region SY product SPREADSHEETS
INFO:root:Processing region SY product DOCS
INFO:root:Empty time series for region SY product DOCS
INFO:root:Processing region SY product EARTH
INFO:root:Empty time series for region SY product EARTH
INFO:root:Found 4 region disruptions from 4 product disruptions for SY
INFO:root:Processing region TG product WEB_SEARCH
INFO:root:Found 3 major product disruptions from 3 disruptions and 15 anomalies
INFO:root:Processing region TG product MAPS
INFO:root:Empty time series for region TG product MAPS
INFO:root:Processing region TG product IMAGES
INFO:root:Empty time series for region TG product IMAGES
INFO:root:Processing region TG product YOUTUBE
INFO:root:Found 1 major product disruptions from 5 disruptions and 9 anomalies
INFO:root:Processing region TG product BLOGGER
INFO:root:Found 2 major product disruptions from 5 disruptions and 10 anomalies
INFO:root:Processing region TG product SITES
INFO:root:Found no anomalies
INFO:root:Processing region TG product GMAIL
INFO:root:Empty time series for region TG product GMAIL
INFO:root:Processing region TG product GROUPS
INFO:root:Found no anomalies
INFO:root:Processing region TG product TRANSLATE
INFO:root:Found 2 major product disruptions from 8 disruptions and 17 anomalies
INFO:root:Processing region TG product SPREADSHEETS
INFO:root:Empty time series for region TG product SPREADSHEETS
INFO:root:Processing region TG product DOCS
INFO:root:Empty time series for region TG product DOCS
INFO:root:Processing region TG product EARTH
INFO:root:Empty time series for region TG product EARTH
INFO:root:Found 4 region disruptions from 8 product disruptions for TG
INFO:root:Processing region TR product WEB_SEARCH
INFO:root:Found 0 major product disruptions from 8 disruptions and 22 anomalies
INFO:root:Processing region TR product MAPS
INFO:root:Empty time series for region TR product MAPS
INFO:root:Processing region TR product IMAGES
INFO:root:Empty time series for region TR product IMAGES
INFO:root:Processing region TR product YOUTUBE
INFO:root:Found 2 major product disruptions from 4 disruptions and 26 anomalies
INFO:root:Processing region TR product BLOGGER
INFO:root:Found 0 major product disruptions from 4 disruptions and 5 anomalies
INFO:root:Processing region TR product SITES
INFO:root:Found 2 major product disruptions from 4 disruptions and 10 anomalies
INFO:root:Processing region TR product GMAIL
INFO:root:Empty time series for region TR product GMAIL
INFO:root:Processing region TR product GROUPS
INFO:root:Found 1 major product disruptions from 1 disruptions and 3 anomalies
INFO:root:Processing region TR product TRANSLATE
INFO:root:Found 0 major product disruptions from 2 disruptions and 8 anomalies
INFO:root:Processing region TR product SPREADSHEETS
INFO:root:Empty time series for region TR product SPREADSHEETS
INFO:root:Processing region TR product DOCS
INFO:root:Empty time series for region TR product DOCS
INFO:root:Processing region TR product EARTH
INFO:root:Empty time series for region TR product EARTH
INFO:root:Found 5 region disruptions from 5 product disruptions for TR
INFO:root:Processing region UA product WEB_SEARCH
INFO:root:Found 0 major product disruptions from 10 disruptions and 16 anomalies
INFO:root:Processing region UA product MAPS
INFO:root:Empty time series for region UA product MAPS
INFO:root:Processing region UA product IMAGES
INFO:root:Empty time series for region UA product IMAGES
INFO:root:Processing region UA product YOUTUBE
INFO:root:Found 0 major product disruptions from 4 disruptions and 25 anomalies
INFO:root:Processing region UA product BLOGGER
INFO:root:Found no anomalies
INFO:root:Processing region UA product SITES
INFO:root:Found no anomalies
INFO:root:Processing region UA product GMAIL
INFO:root:Empty time series for region UA product GMAIL
INFO:root:Processing region UA product GROUPS
INFO:root:Found 2 major product disruptions from 2 disruptions and 3 anomalies
INFO:root:Processing region UA product TRANSLATE
INFO:root:Found 1 major product disruptions from 5 disruptions and 10 anomalies
INFO:root:Processing region UA product SPREADSHEETS
INFO:root:Empty time series for region UA product SPREADSHEETS
INFO:root:Processing region UA product DOCS
INFO:root:Empty time series for region UA product DOCS
INFO:root:Processing region UA product EARTH
INFO:root:Empty time series for region UA product EARTH
INFO:root:Found 3 region disruptions from 3 product disruptions for UA
INFO:root:Processing region VN product WEB_SEARCH
INFO:root:Found 0 major product disruptions from 4 disruptions and 22 anomalies
INFO:root:Processing region VN product MAPS
INFO:root:Empty time series for region VN product MAPS
INFO:root:Processing region VN product IMAGES
INFO:root:Empty time series for region VN product IMAGES
INFO:root:Processing region VN product YOUTUBE
INFO:root:Found 0 major product disruptions from 2 disruptions and 2 anomalies
INFO:root:Processing region VN product BLOGGER
INFO:root:Found 0 major product disruptions from 2 disruptions and 6 anomalies
INFO:root:Processing region VN product SITES
INFO:root:Found no anomalies
INFO:root:Processing region VN product GMAIL
INFO:root:Empty time series for region VN product GMAIL
INFO:root:Processing region VN product GROUPS
INFO:root:Found 1 major product disruptions from 1 disruptions and 4 anomalies
INFO:root:Processing region VN product TRANSLATE
INFO:root:Found 1 major product disruptions from 6 disruptions and 28 anomalies
INFO:root:Processing region VN product SPREADSHEETS
INFO:root:Empty time series for region VN product SPREADSHEETS
INFO:root:Processing region VN product DOCS
INFO:root:Empty time series for region VN product DOCS
INFO:root:Processing region VN product EARTH
INFO:root:Empty time series for region VN product EARTH
INFO:root:Found 1 region disruptions from 2 product disruptions for VN
INFO:root:Processing region PR product WEB_SEARCH
INFO:root:Found 2 major product disruptions from 3 disruptions and 31 anomalies
INFO:root:Processing region PR product MAPS
INFO:root:Empty time series for region PR product MAPS
INFO:root:Processing region PR product IMAGES
INFO:root:Empty time series for region PR product IMAGES
INFO:root:Processing region PR product YOUTUBE
INFO:root:Found 2 major product disruptions from 3 disruptions and 31 anomalies
INFO:root:Processing region PR product BLOGGER
INFO:root:Found 2 major product disruptions from 3 disruptions and 21 anomalies
INFO:root:Processing region PR product SITES
INFO:root:Found 0 major product disruptions from 2 disruptions and 2 anomalies
INFO:root:Processing region PR product GMAIL
INFO:root:Empty time series for region PR product GMAIL
INFO:root:Processing region PR product GROUPS
INFO:root:Found no anomalies
INFO:root:Processing region PR product TRANSLATE
INFO:root:Found 2 major product disruptions from 3 disruptions and 26 anomalies
INFO:root:Processing region PR product SPREADSHEETS
INFO:root:Empty time series for region PR product SPREADSHEETS
INFO:root:Processing region PR product DOCS
INFO:root:Empty time series for region PR product DOCS
INFO:root:Processing region PR product EARTH
INFO:root:Empty time series for region PR product EARTH
INFO:root:Found 2 region disruptions from 8 product disruptions for PR
INFO:root:Processing region VI product WEB_SEARCH
INFO:root:Found 1 major product disruptions from 1 disruptions and 35 anomalies
INFO:root:Processing region VI product MAPS
INFO:root:Empty time series for region VI product MAPS
INFO:root:Processing region VI product IMAGES
INFO:root:Empty time series for region VI product IMAGES
INFO:root:Processing region VI product YOUTUBE
INFO:root:Found 1 major product disruptions from 1 disruptions and 39 anomalies
INFO:root:Processing region VI product BLOGGER
INFO:root:Found 1 major product disruptions from 1 disruptions and 2 anomalies
INFO:root:Processing region VI product SITES
INFO:root:Found no anomalies
INFO:root:Processing region VI product GMAIL
INFO:root:Empty time series for region VI product GMAIL
INFO:root:Processing region VI product GROUPS
INFO:root:Empty time series for region VI product GROUPS
INFO:root:Processing region VI product TRANSLATE
INFO:root:Found 1 major product disruptions from 1 disruptions and 30 anomalies
INFO:root:Processing region VI product SPREADSHEETS
INFO:root:Empty time series for region VI product SPREADSHEETS
INFO:root:Processing region VI product DOCS
INFO:root:Empty time series for region VI product DOCS
INFO:root:Processing region VI product EARTH
INFO:root:Empty time series for region VI product EARTH
INFO:root:Found 1 region disruptions from 4 product disruptions for VI
INFO:root:Processing region TC product WEB_SEARCH
INFO:root:Found 2 major product disruptions from 2 disruptions and 23 anomalies
INFO:root:Processing region TC product MAPS
INFO:root:Empty time series for region TC product MAPS
INFO:root:Processing region TC product IMAGES
INFO:root:Empty time series for region TC product IMAGES
INFO:root:Processing region TC product YOUTUBE
INFO:root:Found 2 major product disruptions from 3 disruptions and 27 anomalies
INFO:root:Processing region TC product BLOGGER
INFO:root:Found 3 major product disruptions from 3 disruptions and 17 anomalies
INFO:root:Processing region TC product SITES
INFO:root:Empty time series for region TC product SITES
INFO:root:Processing region TC product GMAIL
INFO:root:Empty time series for region TC product GMAIL
INFO:root:Processing region TC product GROUPS
INFO:root:Empty time series for region TC product GROUPS
INFO:root:Processing region TC product TRANSLATE
INFO:root:Found 2 major product disruptions from 2 disruptions and 24 anomalies
INFO:root:Processing region TC product SPREADSHEETS
INFO:root:Empty time series for region TC product SPREADSHEETS
INFO:root:Processing region TC product DOCS
INFO:root:Empty time series for region TC product DOCS
INFO:root:Processing region TC product EARTH
INFO:root:Empty time series for region TC product EARTH
INFO:root:Found 4 region disruptions from 9 product disruptions for TC
INFO:root:Processing region US product WEB_SEARCH
INFO:root:Found 0 major product disruptions from 14 disruptions and 20 anomalies
INFO:root:Processing region US product MAPS
INFO:root:Empty time series for region US product MAPS
INFO:root:Processing region US product IMAGES
INFO:root:Empty time series for region US product IMAGES
INFO:root:Processing region US product YOUTUBE
INFO:root:Found no anomalies
INFO:root:Processing region US product BLOGGER
INFO:root:Found 0 major product disruptions from 2 disruptions and 2 anomalies
INFO:root:Processing region US product SITES
INFO:root:Found 0 major product disruptions from 8 disruptions and 10 anomalies
INFO:root:Processing region US product GMAIL
INFO:root:Empty time series for region US product GMAIL
INFO:root:Processing region US product GROUPS
INFO:root:Found 0 major product disruptions from 4 disruptions and 11 anomalies
INFO:root:Processing region US product TRANSLATE
INFO:root:Found 0 major product disruptions from 4 disruptions and 6 anomalies
INFO:root:Processing region US product SPREADSHEETS
INFO:root:Empty time series for region US product SPREADSHEETS
INFO:root:Processing region US product DOCS
INFO:root:Empty time series for region US product DOCS
INFO:root:Processing region US product EARTH
INFO:root:Empty time series for region US product EARTH
INFO:root:Found 0 region disruptions from 0 product disruptions for US

Internet Shutdowns

In [9]:
pyplot.close('all')
internet_shutdowns = [rd for rd in all_disruptions if len(rd.product_disruptions) >= 2]
print("Found %s shutdowns" % len(internet_shutdowns))

for region_disruption in internet_shutdowns:
    num_product_disruptions = len(region_disruption.product_disruptions)
    num_columns = 3
    num_rows = (num_product_disruptions + 1) / 2
    fig = pyplot.figure(figsize=(num_columns * 3.2, num_rows * 2.5))
    fig.tight_layout()
    fig.suptitle("%s %s - %s" % (region_disruption.region_code, region_disruption.start.date(), region_disruption.end.date()))
    end_date = region_disruption.end + datetime.timedelta(days=1)
    duration = end_date - region_disruption.start
    chart_padding = duration * 2
    chart_start_date = region_disruption.start - chart_padding
    chart_end_date = min(end_date + chart_padding, datetime.datetime.now())
    for index, product_disruption in enumerate(region_disruption.product_disruptions):
        chart_traffic = repo.get_traffic(
            region_disruption.region_code, product_disruption.product_id)[chart_start_date:chart_end_date]
        axes = fig.add_subplot(num_rows, num_columns, index + 1)
        axes.set_ylabel(product_disruption.product_id.name)
        axes.set_ylim(bottom=0, top=chart_traffic.max()*1.1)
        axes.plot(chart_traffic)
        axes.axvspan(region_disruption.start, region_disruption.end, alpha=0.2, color='grey')
    fig.show()

    #fa.print_disruption_csv(region_disruption)
Found 26 shutdowns
/usr/local/google/home/fortuna/firehook/net-analysis-github/.venv/lib/python3.6/site-packages/matplotlib/pyplot.py:514: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)

Findings

We can find internet shutdowns by looking at cross-product disruptions

In the example below, we can see shutdowns in July, August and October 2016, and end of May 2017.

In [10]:
show_region_traffic("ET", PRODUCT_LIST)

Anomalies in the US traffic can create anomalies in smaller countries

That happens because the traffic numbers are relative to global traffic, and US traffic dominates the global traffic for some products.

In the example below, the increase in US traffic in October 2016 caused a traffic drop in the other countries.

In [11]:
show_product_traffic(traffic.ProductId.YOUTUBE, [
    "IR", "IQ", "PK", "SA", "SY", "TR", "VN", "PR", "TC", "US"
])
/usr/local/google/home/fortuna/firehook/net-analysis-github/.venv/lib/python3.6/site-packages/matplotlib/pyplot.py:514: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)
In [ ]: